source('../env.R')
Warning: cannot create dir '/Users/james/Projects/urban_community_structure_wrk', reason 'No such file or directory'Warning: cannot create dir '/Users/james/Projects/urban_community_structure_wrk/geo', reason 'No such file or directory'Warning: cannot create dir '/Users/james/Projects/urban_community_structure_wrk/ebird', reason 'No such file or directory'Warning: cannot create dir '/Users/james/Projects/urban_community_structure_wrk/birdlife', reason 'No such file or directory'Warning: cannot create dir '/Users/james/Projects/urban_community_structure_wrk/auth', reason 'No such file or directory'
It seems reasonable to expect that cities with simialr regional pools will have similar species entering the city, and thus a similar response to urbanisation.
communities = read_csv(filename(COMMUNITY_OUTPUT_DIR, 'communities_for_analysis.csv'))
Rows: 2462 Columns: 9── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (5): city_name, jetz_species_name, seasonal, presence, origin
dbl (1): city_id
lgl (3): present_urban_high, present_urban_med, present_urban_low
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
communities_summary = communities %>% group_by(city_id) %>% summarise(
regional_pool_size = n(),
urban_size_high = sum(present_urban_high),
urban_size_med = sum(present_urban_med),
urban_size_low = sum(present_urban_low)
)
communities_summary_long = bind_rows(
communities_summary %>% rename(urban_pool_size = 'urban_size_high') %>% dplyr::select(city_id, regional_pool_size, urban_pool_size) %>% mutate(is_urban_threshold = 'High'),
communities_summary %>% rename(urban_pool_size = 'urban_size_med') %>% dplyr::select(city_id, regional_pool_size, urban_pool_size) %>% mutate(is_urban_threshold = 'Medium'),
communities_summary %>% rename(urban_pool_size = 'urban_size_low') %>% dplyr::select(city_id, regional_pool_size, urban_pool_size) %>% mutate(is_urban_threshold = 'Low')
)
communities_summary_long
city_points = st_centroid(read_sf(filename(CITY_DATA_OUTPUT_DIR, 'city_selection.shp')))
Warning: st_centroid assumes attributes are constant over geometriesWarning: st_centroid does not give correct centroids for longitude/latitude data
community_data_metrics = read_csv(filename(COMMUNITY_OUTPUT_DIR, 'community_assembly_metrics.csv')) %>%
dplyr::select(city_id, mntd_normalised, fd_normalised, urban_pool_size, is_urban_threshold) %>%
left_join(read_csv(filename(CITY_DATA_OUTPUT_DIR, 'realms.csv'))) %>%
left_join(communities_summary_long) %>%
left_join(city_points[,c('city_id', 'city_nm')]) %>%
rename(city_name='city_nm') %>%
mutate(is_urban_threshold = factor(is_urban_threshold, levels = c('low', 'medium', 'high'), labels = c('Low', 'Medium', 'High'), ordered = T)) %>%
arrange(city_id)
Rows: 792 Columns: 93── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): is_urban_threshold
dbl (92): city_id, mntd_normalised, mntd_actual, mntd_min, mntd_max, mntd_mean, mntd_sd, fd_normalised, fd_actual, fd_min, fd_max, fd_m...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 342 Columns: 2── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): core_realm
dbl (1): city_id
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Joining with `by = join_by(city_id)`Joining with `by = join_by(city_id, urban_pool_size, is_urban_threshold)`Joining with `by = join_by(city_id)`
community_data_metrics
Load trait data
traits = read_csv(filename(TAXONOMY_OUTPUT_DIR, 'traits_jetz.csv'))
Rows: 304 Columns: 5── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): jetz_species_name
dbl (4): gape_width, trophic_trait, locomotory_trait, mass
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(traits)
fetch_normalised_traits = function(required_species_list) {
required_traits = traits %>% filter(jetz_species_name %in% required_species_list)
required_traits$gape_width_normalised = normalise(required_traits$gape_width, min(required_traits$gape_width), max(required_traits$gape_width))
required_traits$trophic_trait_normalised = normalise(required_traits$trophic_trait, min(required_traits$trophic_trait), max(required_traits$trophic_trait))
required_traits$locomotory_trait_normalised = normalise(required_traits$locomotory_trait, min(required_traits$locomotory_trait), max(required_traits$locomotory_trait))
required_traits$mass_normalised = normalise(required_traits$mass, min(required_traits$mass), max(required_traits$mass))
traits_normalised_long = required_traits %>% pivot_longer(cols = c('gape_width_normalised', 'trophic_trait_normalised', 'locomotory_trait_normalised', 'mass_normalised'), names_to = 'trait', values_to = 'normalised_value') %>% dplyr::select(jetz_species_name, trait, normalised_value)
traits_normalised_long$trait = factor(traits_normalised_long$trait, levels = c('gape_width_normalised', 'trophic_trait_normalised', 'locomotory_trait_normalised', 'mass_normalised'), labels = c('Gape Width', 'Trophic Trait', 'Locomotory Trait', 'Mass'))
traits_normalised_long
}
fetch_normalised_traits(c('Aplopelia_larvata', 'Chalcophaps_indica', 'Caloenas_nicobarica'))
Read in our phylogeny
phylo_tree = read.tree(filename(TAXONOMY_OUTPUT_DIR, 'phylogeny.tre'))
ggtree(phylo_tree, layout='circular')
Load resolve ecoregions
resolve = read_resolve()
Warning: st_buffer does not correctly buffer longitude/latitude datadist is assumed to be in decimal degrees (arc_degrees).
Warning: st_simplify does not correctly simplify longitude/latitude data, dTolerance needs to be in decimal degrees
to_species_matrix = function(filtered_communities) {
filtered_communities %>%
dplyr::select(city_id, jetz_species_name) %>%
distinct() %>%
mutate(present = TRUE) %>%
pivot_wider(
names_from = jetz_species_name,
values_from = "present",
values_fill = list(present = F)
) %>%
tibble::column_to_rownames(var='city_id')
}
community_nmds = function(filtered_communities) {
species_matrix = to_species_matrix(filtered_communities)
nmds <- metaMDS(species_matrix, k=2, trymax = 30)
nmds_result = data.frame(scores(nmds)$sites)
nmds_result$city_id = as.double(rownames(scores(nmds)$sites))
rownames(nmds_result) = NULL
nmds_result
}
https://www.datacamp.com/tutorial/k-means-clustering-r
scree_plot = function(community_nmds_data) {
# Decide how many clusters to look at
n_clusters <- min(10, nrow(community_nmds_data) - 1)
# Initialize total within sum of squares error: wss
wss <- numeric(n_clusters)
set.seed(123)
# Look over 1 to n possible clusters
for (i in 1:n_clusters) {
# Fit the model: km.out
km.out <- kmeans(community_nmds_data[,c('NMDS1','NMDS2')], centers = i, nstart = 20)
# Save the within cluster sum of squares
wss[i] <- km.out$tot.withinss
}
# Produce a scree plot
wss_df <- tibble(clusters = 1:n_clusters, wss = wss)
scree_plot <- ggplot(wss_df, aes(x = clusters, y = wss, group = 1)) +
geom_point(size = 4)+
geom_line() +
scale_x_continuous(breaks = c(2, 4, 6, 8, 10)) +
xlab('Number of clusters')
scree_plot
}
cluster_cities = function(city_nmds, cities_community_data, centers) {
set.seed(123)
kmeans_clusters <- kmeans(city_nmds[,c('NMDS1', 'NMDS2')], centers = centers, nstart = 20)
city_nmds$cluster = kmeans_clusters$cluster
cities_community_data %>% left_join(city_nmds) %>% mutate(cluster = as.factor(cluster))
}
plot_nmds_clusters = function(cluster_cities) {
cluster_cities %>% dplyr::select(city_id, city_name, NMDS1, NMDS2, cluster) %>% distinct() %>%
ggplot(aes(x = NMDS1, y = NMDS2, colour = cluster)) + geom_point() + geom_label_repel(aes(label = city_name))
}
plot_city_cluster = function(city_cluster_data_metrics, title) {
species_in_cluster = communities %>%
filter(city_id %in% city_cluster_data_metrics$city_id) %>%
group_by(jetz_species_name, city_name, present_urban_high, present_urban_med, present_urban_low) %>%
summarise() %>%
mutate(present_score = present_urban_high + present_urban_med + present_urban_low)
species_in_cluster$present_score = factor(species_in_cluster$present_score, levels = c(0, 1, 2, 3), labels = c('Regional Pool', 'Low Threshold', 'Medium Threshold', 'High Threshold'))
tree_cropped <- ladderize(drop.tip(phylo_tree, setdiff(phylo_tree$tip.label, species_in_cluster$jetz_species_name)))
gg_tree = ggtree(tree_cropped)
gg_presence = ggplot(species_in_cluster, aes(x=city_name, y=jetz_species_name)) +
geom_tile(aes(fill=present_score)) + scale_fill_manual(values = c("green", "yellow", "orange", "red")) +
theme_minimal() + xlab(NULL) + ylab(NULL) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(fill='Presence')
species_in_cluster_traits = fetch_normalised_traits(species_in_cluster$jetz_species_name)
gg_traits = ggplot(species_in_cluster_traits, aes(x = trait, y = jetz_species_name, size = normalised_value)) + geom_point() + theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.text.y=element_blank()) + xlab(NULL) + ylab(NULL) + labs(size = "Normalised Value")
gg_cities_mntd = ggplot(city_cluster_data_metrics, aes(x = city_name, y = mntd_normalised, fill = is_urban_threshold)) + geom_bar(stat = "identity", position = position_dodge(preserve = "single")) + scale_fill_manual(values = c("yellow", "orange", "red")) + theme_minimal() + theme(legend.position = "none", axis.text.x=element_blank()) + xlab(NULL) + ylab("MNTD")
gg_cities_fd = ggplot(city_cluster_data_metrics, aes(x = city_name, y = fd_normalised, fill = is_urban_threshold)) + geom_bar(stat = "identity", position = position_dodge(preserve = "single")) + scale_fill_manual(values = c("yellow", "orange", "red")) + theme_minimal() + theme(legend.position = "none", axis.text.x=element_blank()) + xlab(NULL) + ylab("FD")
gg_title = ggplot() + labs(title = title) + theme_minimal()
gg_presence %>% insert_top(gg_cities_mntd, height = 0.5) %>% insert_top(gg_cities_fd, height = 0.5) %>% insert_left(gg_tree, width = 0.75) %>% insert_right(gg_traits, width = 0.5) %>% insert_top(gg_title, height = 0.06)
}
nearctic_cities = community_data_metrics %>% filter(core_realm == 'Nearctic')
nearctic_cities %>% dplyr::select(city_name) %>% distinct() %>% as.list()
$city_name
[1] "San Jose" "Los Angeles" "Concord" "Tijuana" "Bakersfield"
[6] "Fresno" "Sacramento" "Mexicali" "Hermosillo" "Las Vegas"
[11] "Phoenix" "Tucson" "Durango" "Portland" "Chihuahua"
[16] "Aguascalientes" "Seattle" "Ciudad Juárez" "San Luis PotosÃ" "Mexico City"
[21] "Saltillo" "Vancouver" "Albuquerque" "Monterrey" "Nuevo Laredo"
[26] "San Antonio" "Austin" "Houston" "Dallas" "Oklahoma City"
[31] "Calgary" "New Orleans" "Kansas City" "Omaha" "St. Louis"
[36] "Bradenton" "Tampa" "Minneapolis [Saint Paul]" "Atlanta" "Orlando"
[41] "Louisville" "Chicago" "Indianapolis" "Milwaukee" "Virginia Beach"
[46] "New York"
nearctic_cities_nmds = community_nmds(communities %>% filter(city_id %in% nearctic_cities$city_id))
Run 0 stress 0.1161938
Run 1 stress 0.114161
... New best solution
... Procrustes: rmse 0.029527 max resid 0.1653318
Run 2 stress 0.1161938
Run 3 stress 0.1543555
Run 4 stress 0.1459327
Run 5 stress 0.1496269
Run 6 stress 0.1198631
Run 7 stress 0.1148409
Run 8 stress 0.114161
... Procrustes: rmse 0.000006500798 max resid 0.0000166253
... Similar to previous best
Run 9 stress 0.1194245
Run 10 stress 0.1598455
Run 11 stress 0.1161938
Run 12 stress 0.114161
... Procrustes: rmse 0.000008762407 max resid 0.00003399565
... Similar to previous best
Run 13 stress 0.1456538
Run 14 stress 0.1148408
Run 15 stress 0.1148408
Run 16 stress 0.1161938
Run 17 stress 0.1176404
Run 18 stress 0.1152815
Run 19 stress 0.1198631
Run 20 stress 0.1614273
*** Best solution repeated 2 times
nearctic_cities_nmds
scree_plot(nearctic_cities_nmds)
nearctic_cities = cluster_cities(city_nmds = nearctic_cities_nmds, cities_community_data = nearctic_cities, centers = 3)
Joining with `by = join_by(city_id)`
plot_nmds_clusters(nearctic_cities)
nearctic_biomes = st_crop(resolve[resolve$REALM == 'Nearctic',c('REALM')], xmin = -220, ymin = 0, xmax = 0, ymax = 70)
although coordinates are longitude/latitude, st_intersection assumes that they are planar
Warning: attribute variables are assumed to be spatially constant throughout all geometries
ggplot() +
geom_sf(data = nearctic_biomes, aes(geometry = geometry)) +
geom_sf(data = nearctic_cities, aes(geometry = geometry, color = cluster))
nearctic_cities %>% filter(cluster == 1) %>% plot_city_cluster('Neartic cluster 1')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
nearctic_cities %>% filter(cluster == 2) %>% plot_city_cluster('Neartic cluster 2')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
nearctic_cities %>% filter(cluster == 3) %>% plot_city_cluster('Neartic cluster 3')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
neotropic_cities = community_data_metrics %>% filter(core_realm == 'Neotropic')
neotropic_cities %>% dplyr::select(city_name) %>% distinct() %>% as.list()
$city_name
[1] "Culiacán" "Guadalajara" "Morelia" "Acapulco"
[5] "Querétaro" "Cuernavaca" "Puebla" "Oaxaca"
[9] "Xalapa" "Veracruz" "Tuxtla Gutiérrez" "Villahermosa"
[13] "Guatemala City" "San Salvador" "San Pedro Sula" "Mérida"
[17] "Tegucigalpa" "Managua" "San José" "Cancún"
[21] "Guayaquil" "Chiclayo" "Panama City" "Trujillo"
[25] "Quito" "Havana" "Cali" "Lima"
[29] "Pereira" "Miami" "MedellÃn" "Ibagué"
[33] "Cartagena" "Kingston" "Bogota" "Barranquilla"
[37] "Bucaramanga" "Cúcuta" "Maracaibo" "Arequipa"
[41] "Barquisimeto" "Santo Domingo" "Maracay" "El Alto [La Paz]"
[45] "Caracas" "Cochabamba" "Viña del Mar [ValparaÃso]" "RÃo Piedras [San Juan]"
[49] "Barcelona" "Concepción" "Santiago" "Mendoza"
[53] "Salta" "Cordoba" "Asuncion" "Buenos Aires"
[57] "La Plata" "Ciudad del Este" "Montevideo" "Mar del Plata"
[61] "Porto Alegre" "São Paulo" "Santos" "Sao Jose dos Campos"
neotropic_cities_nmds = community_nmds(communities %>% filter(city_id %in% neotropic_cities$city_id))
Run 0 stress 0.134619
Run 1 stress 0.1346369
... Procrustes: rmse 0.001174416 max resid 0.006766458
... Similar to previous best
Run 2 stress 0.1405642
Run 3 stress 0.134619
... New best solution
... Procrustes: rmse 0.00006620163 max resid 0.000198548
... Similar to previous best
Run 4 stress 0.1344509
... New best solution
... Procrustes: rmse 0.006936315 max resid 0.04569844
Run 5 stress 0.1344509
... Procrustes: rmse 0.00003217062 max resid 0.0001023009
... Similar to previous best
Run 6 stress 0.134433
... New best solution
... Procrustes: rmse 0.001185892 max resid 0.00682719
... Similar to previous best
Run 7 stress 0.1348046
... Procrustes: rmse 0.006548543 max resid 0.04605725
Run 8 stress 0.134433
... New best solution
... Procrustes: rmse 0.00004384783 max resid 0.0001411792
... Similar to previous best
Run 9 stress 0.1348237
... Procrustes: rmse 0.006648934 max resid 0.04607877
Run 10 stress 0.1405635
Run 11 stress 0.1405831
Run 12 stress 0.141222
Run 13 stress 0.134433
... Procrustes: rmse 0.00004948105 max resid 0.0001550979
... Similar to previous best
Run 14 stress 0.1406945
Run 15 stress 0.1363857
Run 16 stress 0.1346225
... Procrustes: rmse 0.007131977 max resid 0.04573052
Run 17 stress 0.1348237
... Procrustes: rmse 0.006652901 max resid 0.04610229
Run 18 stress 0.1344509
... Procrustes: rmse 0.001188588 max resid 0.007107747
... Similar to previous best
Run 19 stress 0.134433
... Procrustes: rmse 0.00001263404 max resid 0.00003984932
... Similar to previous best
Run 20 stress 0.1405823
*** Best solution repeated 4 times
neotropic_cities_nmds
scree_plot(neotropic_cities_nmds)
neotropic_cities = cluster_cities(city_nmds = neotropic_cities_nmds, cities_community_data = neotropic_cities, centers = 3)
Joining with `by = join_by(city_id)`
plot_nmds_clusters(neotropic_cities)
neotropic_biomes = resolve[resolve$REALM == 'Neotropic',c('REALM')]
ggplot() +
geom_sf(data = neotropic_biomes, aes(geometry = geometry)) +
geom_sf(data = neotropic_cities, aes(geometry = geometry, color = cluster))
neotropic_cities %>% filter(cluster == 1) %>% plot_city_cluster('Neotropic cluster 1')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
neotropic_cities %>% filter(cluster == 2) %>% plot_city_cluster('Neotropic cluster 2')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
neotropic_cities %>% filter(cluster == 3) %>% plot_city_cluster('Neotropic cluster 3')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
palearctic_cities = community_data_metrics %>% filter(core_realm == 'Palearctic')
palearctic_cities %>% dplyr::select(city_name) %>% distinct() %>% as.list()
$city_name
[1] "Lisbon" "Porto" "Marrakesh" "Seville" "Dublin"
[6] "Málaga" "Madrid" "Glasgow" "Bilbao" "Liverpool"
[11] "Bristol" "Manchester" "Birmingham" "Leeds" "Newcastle upon Tyne"
[16] "Sheffield" "Nottingham" "Valencia" "London" "Toulouse"
[21] "Paris" "Barcelona" "Rotterdam [The Hague]" "Brussels" "Amsterdam"
[26] "Lyon" "Marseille" "Dusseldorf" "Nice" "Frankfurt am Main"
[31] "Zurich" "Oslo" "Stuttgart" "Hamburg" "Genoa"
[36] "Nuremberg" "Copenhagen" "Munich" "Berlin" "Dresden"
[41] "Rome" "Prague" "Stockholm" "Poznan" "Vienna"
[46] "Wroclaw" "Zagreb" "Gdansk" "Budapest" "Krakow"
[51] "Warsaw" "Helsinki" "Riga" "Belgrade" "Lviv"
[56] "Sofia" "Thessaloniki" "Minsk" "Athens" "Kyiv"
[61] "Istanbul" "Odesa" "Samsun" "Luxor" "Tel Aviv"
[66] "Jerusalem" "Tbilisi" "Yerevan" "Abu Dhabi" "Dubai"
[71] "Bishkek"
palearctic_cities_nmds = community_nmds(communities %>% filter(city_id %in% palearctic_cities$city_id))
Run 0 stress 0.04203818
Run 1 stress 0.05819353
Run 2 stress 0.04984179
Run 3 stress 0.06377882
Run 4 stress 0.04580606
Run 5 stress 0.04337554
Run 6 stress 0.1267478
Run 7 stress 0.05064886
Run 8 stress 0.1058666
Run 9 stress 0.08693039
Run 10 stress 0.04500686
Run 11 stress 0.05497039
Run 12 stress 0.04351811
Run 13 stress 0.1568467
Run 14 stress 0.1373195
Run 15 stress 0.0431116
Run 16 stress 0.05064975
Run 17 stress 0.07403216
Run 18 stress 0.04354788
Run 19 stress 0.04203775
... New best solution
... Procrustes: rmse 0.000102071 max resid 0.0003739103
... Similar to previous best
Run 20 stress 0.06461453
*** Best solution repeated 1 times
palearctic_cities_nmds
scree_plot(palearctic_cities_nmds)
palearctic_cities = cluster_cities(city_nmds = palearctic_cities_nmds, cities_community_data = palearctic_cities, centers = 4)
Joining with `by = join_by(city_id)`
plot_nmds_clusters(palearctic_cities)
palearctic_biomes = resolve[resolve$REALM == 'Palearctic',c('REALM')]
ggplot() +
geom_sf(data = palearctic_biomes, aes(geometry = geometry)) +
geom_sf(data = palearctic_cities, aes(geometry = geometry, color = cluster))
palearctic_cities %>% filter(cluster == 1) %>% plot_city_cluster('Palearctic cluster 1')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
palearctic_cities %>% filter(cluster == 2) %>% plot_city_cluster('Palearctic cluster 2')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
palearctic_cities %>% filter(cluster == 3) %>% plot_city_cluster('Palearctic cluster 3')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
palearctic_cities %>% filter(cluster == 4) %>% plot_city_cluster('Palearctic cluster 4')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
afrotropic_cities = community_data_metrics %>% filter(core_realm == 'Afrotropic')
afrotropic_cities %>% dplyr::select(city_name) %>% distinct() %>% as.list()
$city_name
[1] "Cape Town" "Johannesburg" "Pretoria" "Kigali" "Kampala" "Arusha" "Nairobi" "Addis Ababa"
afrotropic_cities_nmds = community_nmds(communities %>% filter(city_id %in% afrotropic_cities$city_id))
Run 0 stress 0.06095916
Run 1 stress 0.06095916
... Procrustes: rmse 0.00001563648 max resid 0.00002846132
... Similar to previous best
Run 2 stress 0.06095904
... New best solution
... Procrustes: rmse 0.0002654117 max resid 0.0004793866
... Similar to previous best
Run 3 stress 0.1246142
Run 4 stress 0.06095907
... Procrustes: rmse 0.0002020247 max resid 0.0003683555
... Similar to previous best
Run 5 stress 0.06095908
... Procrustes: rmse 0.0002145394 max resid 0.0003892005
... Similar to previous best
Run 6 stress 0.06095922
... Procrustes: rmse 0.0003263648 max resid 0.0005977107
... Similar to previous best
Run 7 stress 0.06095938
... Procrustes: rmse 0.0005326933 max resid 0.0009871214
... Similar to previous best
Run 8 stress 0.1246143
Run 9 stress 0.198614
Run 10 stress 0.06095908
... Procrustes: rmse 0.0001415359 max resid 0.0002526003
... Similar to previous best
Run 11 stress 0.0609591
... Procrustes: rmse 0.0001761283 max resid 0.000316345
... Similar to previous best
Run 12 stress 0.06095931
... Procrustes: rmse 0.0003961754 max resid 0.0007336573
... Similar to previous best
Run 13 stress 0.06095922
... Procrustes: rmse 0.0003270253 max resid 0.0005854539
... Similar to previous best
Run 14 stress 0.06095933
... Procrustes: rmse 0.0005079837 max resid 0.0009130458
... Similar to previous best
Run 15 stress 0.126785
Run 16 stress 0.1352924
Run 17 stress 0.06095934
... Procrustes: rmse 0.0003673878 max resid 0.0006914034
... Similar to previous best
Run 18 stress 0.1246142
Run 19 stress 0.1254289
Run 20 stress 0.06095926
... Procrustes: rmse 0.0003421154 max resid 0.0006007065
... Similar to previous best
*** Best solution repeated 12 times
afrotropic_cities_nmds
scree_plot(afrotropic_cities_nmds)
afrotropic_cities = cluster_cities(city_nmds = afrotropic_cities_nmds, cities_community_data = afrotropic_cities, centers = 4)
Joining with `by = join_by(city_id)`
plot_nmds_clusters(afrotropic_cities)
afrotropicbiomes = resolve[resolve$REALM == 'Afrotropic',c('REALM')]
ggplot() +
geom_sf(data = afrotropicbiomes, aes(geometry = geometry)) +
geom_sf(data = afrotropic_cities, aes(geometry = geometry, color = cluster))
afrotropic_cities %>% filter(cluster == 1) %>% plot_city_cluster('Afrotropic cluster 1')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
afrotropic_cities %>% filter(cluster == 2) %>% plot_city_cluster('Afrotropic cluster 2')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
afrotropic_cities %>% filter(cluster == 3) %>% plot_city_cluster('Afrotropic cluster 3')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
afrotropic_cities %>% filter(cluster == 4) %>% plot_city_cluster('Afrotropic cluster 4')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
indomalayan_cities = community_data_metrics %>% filter(core_realm == 'Indomalayan')
indomalayan_cities %>% dplyr::select(city_name) %>% distinct() %>% as.list()
$city_name
[1] "Swat City" "Srinagar" "Jamnagar" "Jammu" "Rajkot" "Bikaner"
[7] "Jodhpur" "Jalandhar" "Ahmedabad" "Bhavnagar" "Ludhiana" "Anand"
[13] "Udaipur" "Surat" "Vadodara" "Ajmer" "Chandigarh" "Vasai-Virar"
[19] "Mumbai" "Jaipur" "Delhi [New Delhi]" "Nashik" "Dehradun" "Kota"
[25] "Pune" "Haridwar" "Dhule" "Ujjain" "Indore" "Ahmadnagar"
[31] "Kolhapur" "Jalgaon" "Agra" "Aurangabad" "Sangli" "Belagavi"
[37] "Gwalior" "Budaun" "Bareilly" "Dharwad" "Bhopal" "Bhind"
[43] "Mangaluru" "Solapur" "Vijayapura" "Akola" "Latur" "Kannur"
[49] "Davanagere" "Thalassery" "Amravati" "Kalaburagi" "Kozhikode" "Guruvayur"
[55] "Malappuram" "Lucknow" "Thrissur" "Mysuru" "Kochi" "Nagpur"
[61] "Kollam" "Jabalpur" "Ettumanoor" "Hyderabad" "Coimbatore" "Bengaluru"
[67] "Thiruvananthapuram" "Tiruppur" "Faizabad" "Erode" "Prayagraj" "Pratapgarh"
[73] "Salem" "Dindigul" "Madurai" "Tiruchirappalli" "Durg" "Vellore"
[79] "Tirupati" "Raipur" "Bilaspur" "Vijayawada" "Puducherry" "Chennai"
[85] "Kathmandu" "Colombo" "Rajamahendravaram" "Patna" "Kandy" "Bihar Sharif"
[91] "Visakhapatnam" "Ranchi" "Brahmapur" "Jamshedpur" "Darjeeling" "Siliguri"
[97] "Cuttack" "Bhubaneshwar" "Jalpaiguri" "Berhampore" "Kolkata" "Krishnanagar"
[103] "Guwahati [Dispur]" "Agartala" "Silchar" "Dimapur" "Bangkok" "George Town"
[109] "Kuala Lumpur" "Phnom Penh" "Singapore" "Hong Kong" "Sha Tin" "Hsinchu"
[115] "Taichung" "New Taipei [Taipei]" "Tainan" "Denpasar" "Kaohsiung" "Kota Kinabalu"
indomalayan_cities_nmds = community_nmds(communities %>% filter(city_id %in% indomalayan_cities$city_id))
Run 0 stress 0.1199339
Run 1 stress 0.1149925
... New best solution
... Procrustes: rmse 0.02333645 max resid 0.2337271
Run 2 stress 0.1462678
Run 3 stress 0.1396866
Run 4 stress 0.1259837
Run 5 stress 0.1423493
Run 6 stress 0.1219108
Run 7 stress 0.1592393
Run 8 stress 0.156701
Run 9 stress 0.1469659
Run 10 stress 0.1179961
Run 11 stress 0.1247678
Run 12 stress 0.147121
Run 13 stress 0.1175222
Run 14 stress 0.1537648
Run 15 stress 0.129813
Run 16 stress 0.1583941
Run 17 stress 0.1413598
Run 18 stress 0.156746
Run 19 stress 0.1550026
Run 20 stress 0.1301376
Run 21 stress 0.1171922
Run 22 stress 0.1674604
Run 23 stress 0.1286946
Run 24 stress 0.1654557
Run 25 stress 0.1318567
Run 26 stress 0.1160446
Run 27 stress 0.120831
Run 28 stress 0.1241018
Run 29 stress 0.1167138
Run 30 stress 0.1589124
*** Best solution was not repeated -- monoMDS stopping criteria:
1: no. of iterations >= maxit
29: stress ratio > sratmax
indomalayan_cities_nmds
scree_plot(indomalayan_cities_nmds)
indomalayan_cities = cluster_cities(city_nmds = indomalayan_cities_nmds, cities_community_data = indomalayan_cities, centers = 4)
Joining with `by = join_by(city_id)`
plot_nmds_clusters(indomalayan_cities)
indomalayan_biomes = resolve[resolve$REALM == 'Indomalayan',c('REALM')]
ggplot() +
geom_sf(data = indomalayan_biomes, aes(geometry = geometry)) +
geom_sf(data = indomalayan_cities, aes(geometry = geometry, color = cluster))
indomalayan_cities %>% filter(cluster == 1) %>% plot_city_cluster('Indomalayan cluster 1')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
indomalayan_cities %>% filter(cluster == 2) %>% plot_city_cluster('Indomalayan cluster 2')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
indomalayan_cities %>% filter(cluster == 3) %>% plot_city_cluster('Indomalayan cluster 3')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.
indomalayan_cities %>% filter(cluster == 4) %>% plot_city_cluster('Indomalayan cluster 4')
`summarise()` has grouped output by 'jetz_species_name', 'city_name', 'present_urban_high', 'present_urban_med'. You can override using the `.groups` argument.